# 	Filename: exitMI_source.R
# 	Description: Source script to produce results based on regressions where `leaderexitsoffice' is the dependent variable using the multiply imputed data sets. Called from exit_driver.R
# 	Author: Barry Hashimoto
# 	Date: December 2019
# 	For: Barry Hashimoto, "Autocratic Consent to International Law: the Case of the International Criminal Court's Jurisdiction," International Organization.

# Set the number of cores for parallel computation. 
NumberCores <- max(availableCores()-2, 1, na.rm = T)

############################################################################################################
# Prepare data.
 # Rename the rule of law variable from VDEM.
   for (z in 1:10){full$imputations[[z]]$ruleoflaw <- full$imputations[[z]]$vdem.ruleoflaw}
 # Drop all observations before 1998(3)
   for (z in 1:10){full$imputations[[z]] <- full$imputations[[z]][full$imputations[[z]]$quarter>=154, ]}  
   
# Assign all countries with max(leaderexitsoffice) = 0 the same country code to increase the efficiency of Firth logit estimation.
  NewCcode <- with(full$imputations[[1]], aggregate(leaderexitsoffice, by = list(ccode=ccode), FUN = max))
  NewCcode$x <- as.factor(ifelse(NewCcode$x==0, 999, NewCcode$ccode))
  names(NewCcode) <- c("ccode", "ccode2")
  for (z in 1:10){full$imputations[[z]] <- merge(full$imputations[[z]], NewCcode, by = "ccode")}  

# Package MI data sets. This code is only used for estimating the Firth logits without parallel processing and will not be run otherwise.
if(FALSE){imputed <- mi(full$imputations[[1]],full$imputations[[2]], full$imputations[[3]],full$imputations[[4]],
  full$imputations[[5]],full$imputations[[6]], full$imputations[[7]],full$imputations[[8]],full$imputations[[9]],
  full$imputations[[10]]); all <- imputationList(datasets = imputed)}

############################################################################################################
 # Prepare data and match.
  data.list <- list()
  for (i in 1:10){
	data.list[[i]] <- with(full$imputations[[i]], data.frame(leaderexitsoffice, logdeaths, ptssum.mean, RomeStatuteRatification, 
	logoilrent, allunemp, loggrowth, x.ucdp.incidence, cowwar, capital.scaled, gdp.scaled, ruleoflaw, democracy, common, mixed, 
	islam, pre98conflict, medianIMR, ccode, ccode2, lcode, year, quarter, quartersinoffice, exit.trend.single, exit.trend.country, quartersinoffice))
	}

# A function to stop excess verbosity when running matchit().
 quiet <- function(x) { 
   sink(tempfile()) 
   on.exit(sink()) 
   invisible(force(x)) 
  }
 
  match.list <- list()
  for (i in 1:10){
	set1 <- data.list[[i]]
    bin <- 1/3 # Bin at 33rd and 67th percentiles, approximately.
	oil.cuts <- quantile(set1$logoilrent, seq(0,1,bin))
	allunemp.cuts <- quantile(set1$allunemp, seq(0,1, bin))
	growth.cuts <- quantile(set1$loggrowth, seq(0,1, bin))
	gdp.cuts <- quantile(set1$gdp.scaled, seq(0,1, bin))
	capital.cuts <- quantile(set1$capital.scaled, seq(0,1, bin))
	rol.cuts <- quantile(set1$ruleoflaw, seq(0,1, bin))
	pts.cuts<- quantile(set1$ptssum.mean, seq(0,1, bin))
	imr.cuts <- quantile(set1$medianIMR, seq(0,1, bin))
	cut.list <- list(allunemp = allunemp.cuts, loggrowth = growth.cuts, logoilrent = oil.cuts, 
	gdp.scaled = gdp.cuts, capital.scaled = capital.cuts, ruleoflaw = rol.cuts, ptssum.mean = pts.cuts, medianIMR = imr.cuts)
	match.list[[i]] <- quiet(matchit(RomeStatuteRatification ~ x.ucdp.incidence + cowwar + logoilrent 
	+ allunemp + loggrowth + gdp.scaled + ruleoflaw + capital.scaled + common + mixed + islam 
	+ pre98conflict + medianIMR + ptssum.mean, data = data.list[[i]], method = "cem" , cutpoints = cut.list, verbose = F))
    }
    
  #summary(match.list[[i]])

############################################################################################################
# Define some useful functions.

# A function to pool the regression results according to Rubin's Rules for multiply imputed data. For use with R models where we get the $coefficients argument in the function's third line, such as glm() and glmer()

pool2 <- function(x, n){
  betas <- list()
  for(i in 1:n){betas[[i]] <- summary(x[[i]])$coefficients[,1] }
  betas <- do.call(rbind, betas)
  vmat <- MIextract(x, fun = vcov)
  ses <- list()
  for(i in 1:n){ses[[i]] <-  sqrt(diag(vmat[[i]]))}
  ses <- do.call(rbind, ses)  
  pooled <- mi.meld(betas[,1:2], ses[,1:2], byrow = T) # Omit the fixed intercepts from the pooling b/c some are NA.
  pooled <- do.call(rbind, pooled)
  pooled <- t(pooled)
  pooled <- cbind(pooled, dnorm(pooled[,1]/pooled[,2]))
  colnames(pooled) <- c("Est", "SE", "p")
  return(pooled)
	}
	
 # A function to pool the regression results according to Rubin's Rules for multiply imputed data. For use with models estimated model "logit" in Zelig.
pool3 <- function(x, n){
  betas <- list()
  for(i in 1:n){betas[[i]] <- coef(x[[i]])}
  betas <- do.call(rbind, betas)
  ses <- list()
  for(i in 1:n){ses[[i]] <-  sqrt(diag(vcov(x[[i]])[[1]]))}
  ses <- do.call(rbind, ses)  
  pooled <- mi.meld(betas, ses, byrow = T)	
  pooled <- do.call(rbind, pooled)
  pooled <- t(pooled)
  pooled <- cbind(pooled, dnorm(pooled[,1]/pooled[,2]))
  colnames(pooled) <- c("Est", "SE", "p")
  return(pooled)
  }

####################################################################################################
# Models where "leaderexitsoffice" is the dependent variable.

suppressPackageStartupMessages(library(brglm)) # Bias-reduced logit estimation appropriate for cases of separation due to country and year dummies.

FirthLogit <- function(x){do.call(brglm,args=list(formula=formula,data=x,family=binomial(link="logit"),method="brglm.fit"))}

formula <- as.formula("leaderexitsoffice ~ RomeStatuteRatification + logoilrent + loggrowth + common + mixed + islam + pre98conflict + x.ucdp.incidence + cowwar + allunemp + gdp.scaled + ruleoflaw + capital.scaled + exit.trend.single + as.factor(ccode2) + as.factor(year)")
model0 <- mclapply(full$imputations, FirthLogit, mc.cores = NumberCores, mc.preschedule = FALSE)

ZeligLogit <- function(x){do.call(zelig,args=list(formula=formula,data=x,model="logit"))}
MatchDataList <- lapply(match.list, match.data)

formula <- as.formula("leaderexitsoffice ~ RomeStatuteRatification + logoilrent + loggrowth + common + mixed + islam + pre98conflict + x.ucdp.incidence + cowwar + allunemp + gdp.scaled + ruleoflaw + capital.scaled + exit.trend.single")
model1 <- mclapply(MatchDataList, ZeligLogit, mc.cores = NumberCores, mc.preschedule = FALSE)

formula <- as.formula("leaderexitsoffice ~ RomeStatuteRatification + logoilrent + loggrowth + common + mixed + islam + pre98conflict + x.ucdp.incidence + cowwar + allunemp + gdp.scaled + ruleoflaw + capital.scaled + exit.trend.country + as.factor(ccode2) + as.factor(year)")
model2 <- mclapply(full$imputations, FirthLogit, mc.cores = NumberCores, mc.preschedule = FALSE)

formula <- as.formula("leaderexitsoffice ~ RomeStatuteRatification + logoilrent + loggrowth + common + mixed + islam + pre98conflict + x.ucdp.incidence + cowwar + allunemp + gdp.scaled + ruleoflaw + capital.scaled + exit.trend.country")
model3 <- mclapply(MatchDataList, ZeligLogit, mc.cores = NumberCores, mc.preschedule = FALSE)


# Alternative, slower method of running the above models.

if(FALSE){
# Full data, logit, shared baseline
 model0 <- with(all, brglm(leaderexitsoffice ~ 
	RomeStatuteRatification + logoilrent + loggrowth 
	+ common + mixed + islam + pre98conflict + x.ucdp.incidence + cowwar 
	+ allunemp + gdp.scaled + ruleoflaw + capital.scaled + exit.trend.single 
	+ as.factor(ccode2) + as.factor(year), 
  family = binomial(link = "logit"), method = "brglm.fit"))
 #pool2(model0,10)
 
# Matched data, logit, shared baseline. Variable "cowwar" omitted due to no variation.
 model1 <- list()
 for (i in 1:10){model1[[i]] <- zelig(formula = leaderexitsoffice ~ 
 	RomeStatuteRatification + logoilrent + loggrowth 
 	+ allunemp + gdp.scaled + ruleoflaw + capital.scaled 
	+ common + mixed + islam + pre98conflict + x.ucdp.incidence 
 	+ medianIMR + ptssum.mean + exit.trend.single, 
 	model = "logit", data = match.data(match.list[[i]]), cite = F)}
  #pool3(model1,10)

# Full data, logit, unit-specific baseline
 model2 <- with(all, brglm(leaderexitsoffice ~ 
	RomeStatuteRatification + logoilrent + loggrowth 
	+ common + mixed + islam + pre98conflict + x.ucdp.incidence + cowwar 
	+ allunemp + gdp.scaled + ruleoflaw + capital.scaled + exit.trend.country
	+ as.factor(ccode2) + as.factor(year), 
  family = binomial(link = "logit"), method = "brglm.fit"))
 #pool2(model2,10)

# Matched data, logit, unit-specific baseline. Variable "cowwar"  omitted due to no variation.
model3 <- list()
for (i in 1:10){model3[[i]] <- zelig(formula = leaderexitsoffice ~ 
	RomeStatuteRatification + logoilrent + loggrowth 
	+ allunemp + gdp.scaled + ruleoflaw + capital.scaled  
	+ common + mixed + islam + pre98conflict + x.ucdp.incidence
    + medianIMR + ptssum.mean + exit.trend.country, 
	model = "logit", data = match.data(match.list[[i]]), cite = F)}
  #pool3(model3,10)
 }


####################################################################################################
# Some more useful functions.
 
# A function to get the mean and SD of the AIC from the Zelig logits
 get.aic.zelig <- function(x,n){
	fit <- vector()
	for(i in 1:n){fit[i] <- from_zelig_model(x[[i]])$aic}
	stats <- c(mean(fit), sd(fit), NA)
	return(stats)
	}

# A function to get the mean and SD of the AIC from logits.
get.aic <- function(x,n){
	fit <- vector()
	for(i in 1:n){fit[i] <- AIC(logLik(x[[i]]))}
	stats <- c(mean(fit), sd(fit), NA)
	return(stats)
	}

# A function to get model output stats for each model in Table 2
 output <- function(x,n){
	mat <- rbind(pool2(x,n)[2,], get.aic(x,n))
	#mat <- signif(mat, 4)               
	rownames(mat) <- c("Coef. of ICC Jurisdiction", "Model AIC")
	return(mat)
	}
		
# A function to get model output stats for each model in Table 2
 output.zelig <- function(x,n){
	mat <- rbind(pool3(x,n)[2,], get.aic.zelig(x,n))
	#mat <- signif(mat, 4)               
	rownames(mat) <- c("Coef. of ICC Jurisdiction", "Model AIC")
	return(mat)
	}
	
# Create and print the table of results for unmatched and matched data with control variables. 
 col1 <- list(output(model0,10), output.zelig(model1,10))
 col1 <- do.call(rbind, col1)
 col2 <- list(output(model2,10), output.zelig(model3,10))
 col2 <- do.call(rbind, col2)
 exit.table <- round(cbind(col1, col2), digits = 3)
 
writeLines("")
writeLines("# Table of results for models of leader exit in states of the selected regime type. The first and second rows have results for models with control variables using the unmatched and matched data sets. These results appear in the manuscript")
print(exit.table)

# Calculate N of all models
 calcN <- function(x,n){
  vec <- vector()
  for(i in 1:n){vec[i] <- dim(from_zelig_model(x[[i]])$data)[1]}
  return(c(mean(vec), sd(vec)))
  }

writeLines("")
writeLines("# N for the unmatched data set of the selected regime type")
 print(dim(data.list[[1]])[1])
 
writeLines("")
writeLines("# Mean and standard dev. of N across the imputed data sets, after matching, for the selected regime type")
 print(calcN(model1, 10))
 
############################################################################################################
# Models without control variables, for the supplementary appendix. 

formula <- as.formula("leaderexitsoffice ~ RomeStatuteRatification + exit.trend.single + as.factor(ccode2) + as.factor(year)")
nocontrol.single <- mclapply(full$imputations, FirthLogit, mc.cores = NumberCores, mc.preschedule = FALSE)

formula <- as.formula("leaderexitsoffice ~ RomeStatuteRatification + exit.trend.country + as.factor(ccode2) + as.factor(year)")
nocontrol.shared <- mclapply(full$imputations, FirthLogit, mc.cores = NumberCores, mc.preschedule = FALSE)

# Alternative slower method to estimate the models above.
if(FALSE){
 nocontrol.single <- with(all, brglm(leaderexitsoffice ~ 
	RomeStatuteRatification + exit.trend.single + as.factor(ccode2) + as.factor(year), 
  family = binomial(link = "logit"), method = "brglm.fit"))	
 nocontrol.shared <- with(all, brglm(leaderexitsoffice ~ 
	RomeStatuteRatification + exit.trend.country + as.factor(ccode2) + as.factor(year), 
  family = binomial(link = "logit"), method = "brglm.fit"))
}


 row1 <- list(output(nocontrol.single, 10), output(nocontrol.shared, 10))
 row1 <- do.call(cbind, row1)
 row1 <- round(row1, 3)
 

############################################################################################################
# Combine statistics.

  full.exit.table <- do.call(rbind, list(row1, exit.table))
  
  writeLines("")
  writeLines("Table of results for models of leader exit in states of the selected regime type. The first row has results for models without control variables (only appears in the appendix). The second and third rows have results for models with control variables using the unmatched and matched data sets; they are replicates of the results just above.")
  print(full.exit.table) 
  
# library(memisc)
# toLatex(full.exit.table, digits = 3)

############################################################################################################
# Table of balance statistics.

# Stats for the matched data alone.

balance.mat.treated <- NULL
for(i in 1:length(match.list)){
balance.mat.treated <- rbind(balance.mat.treated, t(summary(match.list[[i]])$sum.matched[,1]))
}
treated.balance <- cbind(apply(balance.mat.treated,2,mean))
treated.balance <- round(treated.balance, 2)

balance.mat.control <- NULL
for(i in 1:length(match.list)){
balance.mat.control <- rbind(balance.mat.control, t(summary(match.list[[i]])$sum.matched[,2]))
}
control.balance <- cbind(apply(balance.mat.control,2,mean))
control.balance <- round(control.balance, 2)

rownames(treated.balance) <- rownames(control.balance) <- rownames(summary(match.list[[i]])$sum.matched[,1:2])
colnames(treated.balance) <- colnames(control.balance) <- c("Mean")

n.mat <- NULL
for(i in 1:length(match.list)){
n.mat <- rbind(n.mat, summary(match.list[[i]])$nn[2,])
}
N <- c(apply(n.mat,2,mean)[1], apply(n.mat,2,mean)[2])
# Control stats are in the left two columns, then treated
matched.data.stats <- rbind(cbind(control.balance, treated.balance), N)

# Stats for all data.

balance.mat.treated <- NULL
for(i in 1:length(match.list)){
balance.mat.treated <- rbind(balance.mat.treated, t(summary(match.list[[i]])$sum.all[,1]))
}
treated.balance <- cbind(apply(balance.mat.treated,2,mean))
treated.balance <- round(treated.balance, 2)

balance.mat.control <- NULL
for(i in 1:length(match.list)){
balance.mat.control <- rbind(balance.mat.control, t(summary(match.list[[i]])$sum.all[,2]))
}
control.balance <- cbind(apply(balance.mat.control,2,mean))
control.balance <- round(control.balance, 2)

rownames(treated.balance) <- rownames(control.balance) <- rownames(summary(match.list[[i]])$sum.all[,1:2])
colnames(treated.balance) <- colnames(control.balance) <- c("Mean")

n.mat <- NULL
for(i in 1:length(match.list)){
n.mat <- rbind(n.mat, summary(match.list[[i]])$nn[1,])
}
N <- c(apply(n.mat,2,mean)[1], 	apply(n.mat,2,mean)[2])
# Control stats are in the left two columns, then treated
all.data.stats <- rbind(cbind(control.balance, treated.balance), N)

# Make table with the stats for matched and all data.
balance.stats.table <- cbind(matched.data.stats, all.data.stats)

writeLines("")
writeLines("# Balance statistics for states of the selected regime type. Left pair of columns are control and treated for the matched data. Right pair of columns are control and treated for unmatched data.")
print(balance.stats.table)

############################################################################################################
# SATT statistics.
# Zelig SATT simulation. Simulate, pool, calculate. 

 unloadNamespace("arm") # This may conflict with sim in the zelig/matchit packages.
 suppressPackageStartupMessages(library(dplyr))
 
 MatchDataList <- lapply(match.list, match.data)

 formula <- as.formula("leaderexitsoffice ~ RomeStatuteRatification + logoilrent + loggrowth + allunemp + gdp.scaled + ruleoflaw + capital.scaled + x.ucdp.incidence + common + mixed + islam + pre98conflict + medianIMR + ptssum.mean + exit.trend.country")

 GetSATT <- function(x){
    do.call(zelig, args = list(formula = formula, model = "logit", data = x, cite = F)) %>%
    ATT(treatment = "RomeStatuteRatification", treated = 1, num = 20000) %>%
    get_qi(qi="ATT", xvalue = "TE")
	}

 # Make a vector of all SATT simulations.
 att.pooled <- mclapply(MatchDataList, GetSATT, mc.cores=NumberCores, mc.set.seed=100, mc.preschedule=F) %>% unlist
   
  writeLines("")
  writeLines("# Print 95% interval plus plus 25th and 7th percentiles (top) and median absolute deviation (bottom) of the SATT of RomeStatuteRatification on leaderexitsoffice for states of the selected regime type.")
  print(signif(quantile(att.pooled, c(0.025, 0.25, 0.5, 0.75, 0.975)), digits = 2))
  print(signif(mad(att.pooled),2))
  
# At what rate did states of the selected regime type actually exit office in the 1998 Q3 to 2017 Q4 period?
 exit <- data.list[[1]]$leaderexitsoffice
 writeLines("")
 writeLines("# Rate (mean and std. dev.) at which states of the selected regime type actually exited office in the 1998 Q3 to 2017 Q4 period.")
 print(signif(mean(exit), 2))
 print(signif(sd(exit), 2))

#  At what rate did states of the selected regime type that were parties to the Rome Statute actually exit office in the 1998 Q3 to 2017 Q4 period?
 exit <- data.list[[1]]$leaderexitsoffice[data.list[[1]]$RomeStatuteRatification==1]
 writeLines("")
 writeLines("# Rate (mean and std. dev.) at which states of the selected regime type that were parties to the Rome Statute actually exited office in the 1998 Q3 to 2017 Q4 period.")
 print(signif(mean(exit), 2))
 print(signif(sd(exit), 2))

############################################################################################################# 
# Inspect matched observations.
  
  df <- data.frame(match.list[[1]]$model$data, match.list[[1]]$subclass, match.list[[1]]$weights) %>%
  			rename(weight=match.list..1...weights, subclass = match.list..1...subclass) %>% 
  			subset(subset=weight>0, select=c("quarter", "lcode", "year","subclass", "weight")) %>%
  			setorder("subclass", "weight") 			
  mf <- subset(full$imputations[[1]], select = c("quarter","lcode", "country", "leader","RomeStatuteRatification"))
  pairs <- join(df, mf, by = c("quarter", "lcode"), type = "left")
  pairs <- pairs %>% select(subclass, weight, year, RomeStatuteRatification, country, leader,  quarter, lcode)  
  
  # See some matches.
  pairs[which(pairs$subclass==589),]
  pairs[which(pairs$subclass==702),]
  pairs[which(pairs$subclass==78),]



